; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64
; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-LE-64
; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32
; RUN: opt -passes='sroa<modify-cfg>' -data-layout="e-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-LE-32
; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64
; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32:64" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-64,CHECK-BE-64
; RUN: opt -passes='sroa<preserve-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32
; RUN: opt -passes='sroa<modify-cfg>' -data-layout="E-n8:16:32" -S %s | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-SCALAR,CHECK-SCALAR-32,CHECK-BE-32

define void @load_1byte_chunk_of_2byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_1byte_chunk_of_2byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [2 x i8], align 64
; CHECK-ALL-NEXT:    store <2 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <1 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <1 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [2 x i8], align 64
  store <2 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <1 x i8>, ptr %src, align 1
  store <1 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
  store <1 x i8> %chunk, ptr %dst
  ret void
}

define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
; CHECK-ALL-NEXT:    store <4 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [4 x i8], align 64
  store <4 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <2 x i8>, ptr %src, align 1
  store <2 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
  store <1 x i8> %chunk, ptr %dst
  ret void
}

define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [4 x i8], align 64
; CHECK-ALL-NEXT:    store <4 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <2 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <2 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [4 x i8], align 64
  store <4 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <2 x i8>, ptr %src, align 1
  store <2 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
  store <2 x i8> %chunk, ptr %dst
  ret void
}

define void @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_1byte_chunk_of_8byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
; CHECK-ALL-NEXT:    store <8 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [8 x i8], align 64
  store <8 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <4 x i8>, ptr %src, align 1
  store <4 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
  store <1 x i8> %chunk, ptr %dst
  ret void
}

define void @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_2byte_chunk_of_8byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
; CHECK-ALL-NEXT:    store <8 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [8 x i8], align 64
  store <8 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <4 x i8>, ptr %src, align 1
  store <4 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
  store <2 x i8> %chunk, ptr %dst
  ret void
}

define void @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_4byte_chunk_of_8byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [8 x i8], align 64
; CHECK-ALL-NEXT:    store <8 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <4 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [8 x i8], align 64
  store <8 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <4 x i8>, ptr %src, align 1
  store <4 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
  store <4 x i8> %chunk, ptr %dst
  ret void
}

define void @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_1byte_chunk_of_16byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
; CHECK-ALL-NEXT:    store <16 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [16 x i8], align 64
  store <16 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <8 x i8>, ptr %src, align 1
  store <8 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
  store <1 x i8> %chunk, ptr %dst
  ret void
}

define void @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_2byte_chunk_of_16byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
; CHECK-ALL-NEXT:    store <16 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [16 x i8], align 64
  store <16 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <8 x i8>, ptr %src, align 1
  store <8 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
  store <2 x i8> %chunk, ptr %dst
  ret void
}

define void @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_4byte_chunk_of_16byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
; CHECK-ALL-NEXT:    store <16 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [16 x i8], align 64
  store <16 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <8 x i8>, ptr %src, align 1
  store <8 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
  store <4 x i8> %chunk, ptr %dst
  ret void
}

define void @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_8byte_chunk_of_16byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [16 x i8], align 64
; CHECK-ALL-NEXT:    store <16 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <8 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [16 x i8], align 64
  store <16 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <8 x i8>, ptr %src, align 1
  store <8 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
  store <8 x i8> %chunk, ptr %dst
  ret void
}

define void @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_1byte_chunk_of_32byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [32 x i8], align 64
  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <16 x i8>, ptr %src, align 1
  store <16 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
  store <1 x i8> %chunk, ptr %dst
  ret void
}

define void @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_2byte_chunk_of_32byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [32 x i8], align 64
  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <16 x i8>, ptr %src, align 1
  store <16 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
  store <2 x i8> %chunk, ptr %dst
  ret void
}

define void @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_4byte_chunk_of_32byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [32 x i8], align 64
  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <16 x i8>, ptr %src, align 1
  store <16 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
  store <4 x i8> %chunk, ptr %dst
  ret void
}

define void @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_8byte_chunk_of_32byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [32 x i8], align 64
  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <16 x i8>, ptr %src, align 1
  store <16 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
  store <8 x i8> %chunk, ptr %dst
  ret void
}

define void @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_16byte_chunk_of_32byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [32 x i8], align 64
; CHECK-ALL-NEXT:    store <32 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <16 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <16 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <16 x i8> [[CHUNK]], ptr [[DST:%.*]], align 16
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [32 x i8], align 64
  store <32 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <16 x i8>, ptr %src, align 1
  store <16 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <16 x i8>, ptr %intermediate.off.addr, align 1
  store <16 x i8> %chunk, ptr %dst
  ret void
}

define void @load_1byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_1byte_chunk_of_64byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <1 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <1 x i8> [[CHUNK]], ptr [[DST:%.*]], align 1
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [64 x i8], align 64
  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <32 x i8>, ptr %src, align 1
  store <32 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <1 x i8>, ptr %intermediate.off.addr, align 1
  store <1 x i8> %chunk, ptr %dst
  ret void
}

define void @load_2byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_2byte_chunk_of_64byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <2 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <2 x i8> [[CHUNK]], ptr [[DST:%.*]], align 2
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [64 x i8], align 64
  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <32 x i8>, ptr %src, align 1
  store <32 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <2 x i8>, ptr %intermediate.off.addr, align 1
  store <2 x i8> %chunk, ptr %dst
  ret void
}

define void @load_4byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_4byte_chunk_of_64byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <4 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <4 x i8> [[CHUNK]], ptr [[DST:%.*]], align 4
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [64 x i8], align 64
  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <32 x i8>, ptr %src, align 1
  store <32 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <4 x i8>, ptr %intermediate.off.addr, align 1
  store <4 x i8> %chunk, ptr %dst
  ret void
}

define void @load_8byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_8byte_chunk_of_64byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <8 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <8 x i8> [[CHUNK]], ptr [[DST:%.*]], align 8
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [64 x i8], align 64
  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <32 x i8>, ptr %src, align 1
  store <32 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <8 x i8>, ptr %intermediate.off.addr, align 1
  store <8 x i8> %chunk, ptr %dst
  ret void
}

define void @load_16byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_16byte_chunk_of_64byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <16 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <16 x i8> [[CHUNK]], ptr [[DST:%.*]], align 16
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [64 x i8], align 64
  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <32 x i8>, ptr %src, align 1
  store <32 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <16 x i8>, ptr %intermediate.off.addr, align 1
  store <16 x i8> %chunk, ptr %dst
  ret void
}

define void @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(ptr %src, i64 %byteOff, ptr %dst) nounwind {
; CHECK-ALL-LABEL: @load_32byte_chunk_of_64byte_alloca_with_zero_upper_half(
; CHECK-ALL-NEXT:    [[INTERMEDIATE:%.*]] = alloca [64 x i8], align 64
; CHECK-ALL-NEXT:    store <64 x i8> zeroinitializer, ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INIT:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
; CHECK-ALL-NEXT:    store <32 x i8> [[INIT]], ptr [[INTERMEDIATE]], align 64
; CHECK-ALL-NEXT:    [[INTERMEDIATE_OFF_ADDR:%.*]] = getelementptr inbounds i8, ptr [[INTERMEDIATE]], i64 [[BYTEOFF:%.*]]
; CHECK-ALL-NEXT:    [[CHUNK:%.*]] = load <32 x i8>, ptr [[INTERMEDIATE_OFF_ADDR]], align 1
; CHECK-ALL-NEXT:    store <32 x i8> [[CHUNK]], ptr [[DST:%.*]], align 32
; CHECK-ALL-NEXT:    ret void
;
  %intermediate = alloca [64 x i8], align 64
  store <64 x i8> zeroinitializer,  ptr %intermediate, align 64
  %init = load <32 x i8>, ptr %src, align 1
  store <32 x i8> %init, ptr %intermediate, align 64
  %intermediate.off.addr = getelementptr inbounds i8, ptr %intermediate, i64 %byteOff
  %chunk = load <32 x i8>, ptr %intermediate.off.addr, align 1
  store <32 x i8> %chunk, ptr %dst
  ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-BE-32: {{.*}}
; CHECK-BE-64: {{.*}}
; CHECK-LE-32: {{.*}}
; CHECK-LE-64: {{.*}}
; CHECK-SCALAR: {{.*}}
; CHECK-SCALAR-32: {{.*}}
; CHECK-SCALAR-64: {{.*}}
